#obtain data
ext.temp<-read.table('ecol 562/coral cores.txt',sep=',',header=T)

#stack extension rates from individual cores in one column
#create year and core variables
ext.rates<-data.frame(unlist(ext.temp[,2:15]),rep(ext.temp$Year,14),rep(names(ext.temp)[2:15],rep(nrow(ext.temp),14)))
names(ext.rates)<-c('Ext.rate','Year','Core')

#create identifier of the Core the data came from
ext.rates$reef.type<-substr(ext.rates$Core,1,2)
#sort data in core and year order
ext.rates1<-ext.rates[order(ext.rates$Core,ext.rates$Year),]
#remove missing observations introduced by Excel
ext.rates2<-ext.rates1[!is.na(ext.rates1$Ext.rate),]

#examine the core time series
library(lattice)
xyplot(Ext.rate~Year|Core, data=ext.rates2,type='o')

#fit a common regreession model
model1<-lm(Ext.rate~Year, data=ext.rates2)
coef(model1)
#fit a common regression model with year centered at 1967
model1<-lm(Ext.rate~I(Year-1967), data=ext.rates2)
coef(model1)
#fit a model in which each reef type has its own regression model
model2<-lm(Ext.rate~factor(reef.type)+I(Year-1967)+factor(reef.type):I(Year-1967), data=ext.rates2)
#fit a model in which each reef type has its own slope but each core has its own intercept
model3<-lm(Ext.rate~Core+I(Year-1967)+factor(reef.type):I(Year-1967), data=ext.rates2)
#fit a model in which each core has its own slope and intercept
model4<-lm(Ext.rate~Core+I(Year-1967)+Core:I(Year-1967), data=ext.rates2)
#compare the models
AIC(model1,model2,model3,model4)
#test the separate slope by core against the separate slope by reef type models
anova(model3,model4)

#locate starting year for each core time series
tapply(ext.rates2$Year,ext.rates2$Core,function(x) x[1])

#split residuals into separate groups by Core
list.resids<-split(residuals(model4),ext.rates2$Core)
length(list.resids)
list.resids[[1]]

#insert 30 missing values between the residuals from different cores
long.resids<-list.resids[[1]]
for(i in 2:length(list.resids)){
long.resids<-c(long.resids,rep(NA,30))
long.resids<-c(long.resids,list.resids[[i]])}

long.resids[101:150]

#examine the ACF out to lag 30
acf(long.resids, na.action=na.pass, lag.max=30, ci=1-.05/30, ylim=c(-.2,.2), main=list('ACF plot',cex=1))

#displayed N for confidence bands is incorrect
#calculate sample size at each lag

#number of observations used in lag 30
sum((table(ext.rates2$Core)-30)*((table(ext.rates2$Core)-30)>0))
#number of observations used at each lag
sapply(0:30,function(k) sum((table(ext.rates2$Core)-k)*((table(ext.rates2$Core)-k)>0)))->N.samp

#draw ACF with correct confidence bands
acf(long.resids, na.action=na.pass, lag.max=30, ci.col='white', ylim=c(-.2,.2), main=list('ACF plot',cex=1))
lines(0:30,-qnorm(1-.025/30)/sqrt(N.samp), col=2, lty=2)
lines(0:30,qnorm(1-.025/30)/sqrt(N.samp), col=2, lty=2)

#draw PACF with correct confidence bands
pacf(long.resids, na.action=na.pass, lag.max=30, ci.col='white', ylim=c(-.2,.2), main=list('partialACF plot',cex=1))
lines(0:30,qnorm(1-.025/30)/sqrt(N.samp), col=2, lty=2)
lines(0:30,-qnorm(1-.025/30)/sqrt(N.samp), col=2, lty=2)

############# Adding periodic terms ##############

#determine the best period for a trigonometric model
my.aic<-matrix(NA,ncol=2,nrow=60)
for(k in 1:60) {
update(model4,.~.+cos(2*pi/k*Year)+sin(2*pi/k*Year))->tmodel4
my.aic[k,]<-c(k,AIC(tmodel4))}
plot(my.aic[,1],my.aic[,2],type='l',xlab='Period',ylab='AIC')
which.min(my.aic[,2])
my.aic[36,]

#add periodic term to regression model with best period
update(model4,.~.+cos(2*pi/36*Year)+sin(2*pi/36*Year))->period.model4
AIC(model4,period.model4)

#check to see if periodicity in ACF is removed
list.resids2<-split(residuals(period.model4),ext.rates2$Core)
long.resids2<-list.resids2[[1]]
 for(i in 2:length(list.resids2)){
 long.resids2<-c(long.resids2,rep(NA,30))
 long.resids2<-c(long.resids2,list.resids2[[i]])}

acf(long.resids2, na.action=na.pass, lag.max=30, ci.col='white', ylim=c(-.2,.2), main=list('ACF plot',cex=1))
lines(0:30,-qnorm(1-.025/30)/sqrt(N.samp), col=2, lty=2)
lines(0:30,qnorm(1-.025/30)/sqrt(N.samp), col=2, lty=2)


################ Adding a correlation structure for residuals ###########

#refit model 4 using the gls function of nlme package
library(nlme)
gmodel4<-gls(Ext.rate~Core+I(Year-1967)+Core:I(Year-1967), data=ext.rates2,method='ML',na.action=na.omit)
AIC(model4,gmodel4)

#try fitting an AR(1) model based on previous ACF and PACF
update(gmodel4,correlation=corARMA(p=1,q=0,form=~I(Year-1967)|Core))->gmodel4.1
AIC(gmodel4,gmodel4.1)

#try fitting a range of ARMA(p,q) models and saving the AIC

cor.results<-NULL
for(p in 0:3) {
   for(q in 0:3) {
if(p>0 | q>0) {
cor.temp<-gls(Ext.rate~Core+I(Year-1967)+Core:I(Year-1967),data=ext.rates2,method='ML', na.action=na.omit, correlation=corARMA(p=p,q=q,form=~I(Year-1967)|Core))
cor.results<-rbind(cor.results,c(p, q, logLik(cor.temp),AIC(cor.temp)))}
}}

#the ARMA(2,2) model did not converge and loop aborted
#the ARMA(1,1) model has the lowest AIC of the models that converged
cor.results

#add the ARMA(1,1) structure to themodel
update(gmodel4,correlation=corARMA(p=1,q=1,form=~I(Year-1967)|Core))->gmodel4.2

#refit model 3 using an ARMA(1,1) structure
gmodel3<-gls(Ext.rate~Core+I(Year-1967)+factor(reef.type):I(Year-1967), data=ext.rates2,method='ML',na.action=na.omit)
update(gmodel3,correlation=corARMA(p=1,q=1,form=~I(Year-1967)|Core))->gmodel4.3

#previously model3 and model 4 were significantly different
anova(gmodel3,gmodel4)
#after adding correlation structure they are no longer different
anova(gmodel4.2,gmodel4.3)